import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import librosa
import librosa.display
import soundfile
import random
from IPython.display import display, Audio
t = np.arange(0, 16, 1/100)
sig = np.sin(t)
plt.plot(t, sig)
plt.hlines(0, min(t), max(t), colors='r');
# np.sign reduces a number to its sign
# we shift the signal by 1 and see where sign transitions
# which gives us the crossig
crossings = np.where(np.sign(sig[:-1]) < (np.sign(sig[1:])))[0]
crossings
array([ 0, 628, 1256])
# optional splitting of signal at crossings from negative to non-negative
np.split(sig, crossings);
plt.plot(t, sig)
plt.hlines(0, min(t), max(t), colors='r')
# scale crossings to time resolution
plt.vlines(crossings/100, min(sig), max(sig), colors='g');
def separate_wavesets(array, min_length):
# np.empty or np.zeroes - aber das kann nicht gut wachsen wegen
# der C bibliothek im hintergrund
chunks = np.array(0) # soll eigentlich leer sein, "geht aber nicht"
prev_val = np.array(0.) # warum als array?
current_chunk = np.array(prev_val) # s.o.
chunk_length = 1
for i in range(1, len(array)):
val = array[i] # use foreach loop instead of iterating over indices
if (prev_val < 0. and val >= 0 and chunk_length >= min_length):
# only checks for one crossing
np.append(chunks, current_chunk)
current_chunk = []
chunk_length = 0
np.append(current_chunk, val) # this is really heavy on the performance
chunk_length = chunk_length + 1
prev_val = val
np.append(chunks, current_chunk)
return chunks
Finding wavesets in a sound fileΒΆ
path = 'violin.flac'
data, sr = librosa.load(path, sr=44100, mono=True)
display(Audio(path))
def wavesets_breakpoints(array: np.ndarray, min_length: int):
indices = [0]
prev_item = array[0]
for i, item in enumerate(array):
if ((item > 0.0 and prev_item <= 0.0) and (i-indices[-1] >= min_length)) :
indices.append(i)
prev_item = item
return indices
def separate_wavesets(array: np.ndarray, min_length: int):
ws = np.split(array, wavesets_breakpoints(array, min_length))
ws.pop(0) # we ignore the first
return ws
wavesets_data = separate_wavesets(data, 128)
wavesets_data[1]
array([ 1.6326904e-03, 3.0364990e-03, 4.3487549e-03, 5.5694580e-03,
6.1340332e-03, 6.4086914e-03, 6.8206787e-03, 7.2326660e-03,
6.9732666e-03, 5.8135986e-03, 4.3182373e-03, 2.7618408e-03,
6.4086914e-04, -1.6326904e-03, -3.6468506e-03, -5.0506592e-03,
-5.9509277e-03, -6.5612793e-03, -6.5460205e-03, -5.7830811e-03,
-4.6234131e-03, -3.2806396e-03, -2.6550293e-03, -2.4871826e-03,
-1.7547607e-03, 9.1552734e-05, 2.4108887e-03, 4.6386719e-03,
6.3629150e-03, 6.6833496e-03, 5.7830811e-03, 4.5318604e-03,
3.7841797e-03, 3.2806396e-03, 2.9602051e-03, 2.7008057e-03,
2.1972656e-03, 1.0070801e-03, -6.7138672e-04, -2.1972656e-03,
-3.1890869e-03, -4.2572021e-03, -5.2337646e-03, -5.3253174e-03,
-5.2185059e-03, -5.2795410e-03, -5.0659180e-03, -4.5928955e-03,
-4.3945312e-03, -3.9520264e-03, -2.2888184e-03, 4.5776367e-05,
2.6702881e-03, 4.6386719e-03, 5.6762695e-03, 5.6457520e-03,
4.9285889e-03, 4.2266846e-03, 4.3792725e-03, 4.6081543e-03,
4.5623779e-03, 3.8146973e-03, 2.3193359e-03, 1.5258789e-04,
-1.6784668e-03, -2.8076172e-03, -3.4027100e-03, -4.1046143e-03,
-5.1879883e-03, -6.4544678e-03, -7.1563721e-03, -6.9122314e-03,
-5.9967041e-03, -5.1422119e-03, -4.3945312e-03, -3.5400391e-03,
-2.7160645e-03, -1.8920898e-03, -7.7819824e-04, 6.8664551e-04,
1.8310547e-03, 2.1362305e-03, 2.3803711e-03, 3.0364990e-03,
3.6468506e-03, 3.9978027e-03, 3.9978027e-03, 3.6315918e-03,
2.9907227e-03, 2.8533936e-03, 3.2958984e-03, 3.1127930e-03,
1.7089844e-03, -3.6621094e-04, -2.3040771e-03, -3.8299561e-03,
-4.7607422e-03, -5.0354004e-03, -4.7607422e-03, -4.6081543e-03,
-4.8522949e-03, -5.2490234e-03, -4.9438477e-03, -3.7384033e-03,
-2.7160645e-03, -1.8768311e-03, -8.0871582e-04, 4.5776367e-04,
1.4495850e-03, 1.6326904e-03, 1.9073486e-03, 1.8310547e-03,
8.3923340e-04, -2.5939941e-04, -3.0517578e-05, 1.0681152e-03,
1.9683838e-03, 3.1738281e-03, 3.7689209e-03, 3.3721924e-03,
1.8768311e-03, 7.6293945e-04, 5.6457520e-04, 3.3569336e-04,
-3.5095215e-04, -1.2207031e-03, -2.0751953e-03, -2.8228760e-03,
-3.3264160e-03, -2.5329590e-03, -5.6457520e-04], dtype=float32)
wavesets_data_sorted = sorted(wavesets_data, key=lambda x: x.shape[0])
wavesets_data[0].shape
(156,)
len(wavesets_data)
8053
wavesets_data_sorted[-1].shape
(603,)
flat = np.concatenate(wavesets_data_sorted)
display(Audio(flat, rate=sr))
flat.shape
(1218263,)
flat_stretched = np.empty(shape=flat.shape[0] * 3)
flat_stretched[0::3] = flat
flat_stretched[1::3] = flat
flat_stretched[2::3] = flat
display(Audio(flat_stretched, rate=sr))
WIN_LENGTH = 512
HOP_LENGTH = 512
N_FFT = 512
wavesets_fft = []
for i, item in enumerate(wavesets_data):
data_fft = librosa.stft(item, n_fft=N_FFT, hop_length=HOP_LENGTH, win_length=WIN_LENGTH)
wavesets_fft.append(data_fft)
/Volumes/data/git/musikinformatik-sose2021/venv/lib/python3.8/site-packages/librosa/core/spectrum.py:222: UserWarning: n_fft=512 is too small for input signal of length=326
warnings.warn(
len(wavesets_fft) # why is this slow?
588
Rearranging wavesetsΒΆ
Random orderΒΆ
wavesets_data_shuffled = wavesets_data.copy()
random.shuffle(wavesets_data_shuffled)
data_shuffled = []
for sublist in wavesets_data_shuffled:
for item in sublist:
data_shuffled.append(item)
display(Audio(data_shuffled, rate=sr))
def flatten_wavesets(wavesets):
flattened = []
for sublist in wavesets_data_shuffled:
for item in sublist:
flattened.append(item)
return flattened
## test: do we get back the original ?
## this seems not quite correct
original_data = flatten_wavesets(wavesets_data)
display(Audio(original_data, rate=sr))
## time stretching
stretched_data = []
for item in wavesets_data:
for i in range(2):
stretched_data.append(item)
flattened_data = flatten_wavesets(stretched_data)
display(Audio(flattened_data, rate=sr))
# Now we can write this a bit shorter
wavesets_data_shuffled = wavesets_data.copy()
data_shuffled = flatten_wavesets(random.shuffle(wavesets_data_shuffled))
display(Audio(data_shuffled, rate=sr))
## this seems not quite correct
reordered_wavesets = wavesets_data.reverse()
data_reversed = flatten_wavesets(reordered_wavesets)
display(Audio(data_reversed, rate=sr))